Author

Peter Youyun Zheng

Published

July 18, 2024

Introduction

In this doc, we will be looking at the banksy clusters and try to interpret them based on their identity, and other characteristics.

Load the data

The objects are here:

  1. RDS object with Leiden Clusters: /xchip/beroukhimlab/coja/Spatial_PLGG/data/Xenium/Xenium_Objects/total_spatial_banksy_clusters_20240711_135041.rds

    • /xchip/beroukhimlab/coja/Spatial_PLGG/data/Xenium/Xenium_Objects/total_spatial_banksy_embeddings/total_spatial_banksy_clusters_subset_20240711_135041.rds
  2. RDS object with cell type cluster markers: /xchip/beroukhimlab/coja/Spatial_PLGG/data/Xenium/Xenium_Objects/total_spatial_banksy_clusters_markers_20240711_135041.rds

Metadata is here:

  1. /Users/youyun/Documents/HMS/PhD/beroukhimlab/broad_mount/coja/Spatial_PLGG/data/metadata/Xenium_PS.xlsx
Code
# banksy_embeddings = readRDS(paste0(
#     workdir,'coja/Spatial_PLGG/data/Xenium/Xenium_Objects/total_spatial_banksy_clusters_subset_20240711_135041.rds'
# ))
# colData(banksy_embeddings)$sample_id = factor(colData(banksy_embeddings)$sample_id)

# # generate some leiden clusters at varying resolution
# lambda = c(0.2, 0.8)
# SEED = 55555
# banksy_embeddings <- Banksy::clusterBanksy(
#     banksy_embeddings, dimred = "Harmony_BANKSY_lam0.2", resolution = c(1,0.5), 
#     algo = 'leiden',seed = SEED
# )
# banksy_embeddings <- Banksy::clusterBanksy(
#     banksy_embeddings, dimred = "Harmony_BANKSY_lam0.8", resolution = c(1,0.5), 
#     algo = 'leiden',seed = SEED
# )

# # getting rid of the previously generated clusters because their resolution is too high
# colData(banksy_embeddings)[,c(
#     'clust_Harmony_BANKSY_lam0.2_k50_res1.2','clust_Harmony_BANKSY_lam0.8_k50_res1.2'
# )] = NULL

# # Different clustering runs can be relabeled to minimise their differences with connectClusters:
# banksy_embeddings <- Banksy::connectClusters(banksy_embeddings, map_to = clusterNames(banksy_embeddings)[1])

# # save the clusters
# saveRDS(banksy_embeddings, paste0(
#     workdir,'coja/Spatial_PLGG/data/Xenium/Xenium_Objects/total_spatial_banksy_clusters_subset_20240711_135041_reclustered.rds'
# ))

banksy_embeddings = readRDS(paste0(
    workdir,'coja/Spatial_PLGG/data/Xenium/Xenium_Objects/total_spatial_banksy_clusters_subset_20240711_135041_reclustered.rds'
))

cell_type_markers = fread(paste0(
    workdir,'coja/Spatial_PLGG/data/markers/final_manual_markers.csv'
))
xenium_markers = fread(paste0(
    workdir, 'youyun/plgg/data/xenium_selection/Xenium_hBrain_v1_metadata.csv'
))

Looking at the harmony corrected embeddings

We will first look at the extent of batch effect in the cohort by looking at the embeddings before and after harmony correction.

Code
plot_grid(
    ggdraw() + draw_label("BANKSY Embedding UMAP Before/After Harmony (Cell Identity)", fontface='bold'),
    plot_grid(
        plotReducedDim(
            banksy_embeddings, "UMAP_M1_lam0.2", point_size = 0.1,
            point_alpha = 0.5, color_by = "sample_id"
        ) +
            theme(legend.position = "none"),
        plotReducedDim(
            banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.2", 
            point_size = 0.1,point_alpha = 0.5, color_by = "sample_id"
        ) +
            theme(legend.title = element_blank()) +
            guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))),
        nrow = 1,
        rel_widths = c(1, 1.2)
    ), ncol = 1,rel_heights = c(0.1, 5)
)

Code
plot_grid(
    ggdraw() + draw_label("BANKSY Embedding UMAP Before/After Harmony (Neighborhood)", fontface='bold'),
    plot_grid(
        plotReducedDim(
            banksy_embeddings, "UMAP_M1_lam0.8", point_size = 0.1,
            point_alpha = 0.5, color_by = "sample_id"
        ) +
            theme(legend.position = "none"),
        plotReducedDim(
            banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.8", 
            point_size = 0.1,point_alpha = 0.5, color_by = "sample_id"
        ) +
            theme(legend.title = element_blank()) +
            guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))),
        nrow = 1,
        rel_widths = c(1, 1.2)
    ), ncol = 1,rel_heights = c(0.1, 5)
)

Generate the clusters at different resolution and look at them

It seems like resolution = 1 for leiden clustering is the sweet spot. So we will use that

Code
plot_grid(
    ggdraw() + draw_label("BANKSY Embedding UMAP Before/After Harmony (Cell Identity)", fontface='bold'),
    plot_grid(
        plotReducedDim(
            banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.2", 
            point_size = 0.1, point_alpha = 0.5, 
            color_by = "clust_Harmony_BANKSY_lam0.2_k50_res1"
        ) +
            theme(legend.title = element_blank()) +
            guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))) +
            labs(title = 'Leiden Resolution 1'),
        plotReducedDim(
            banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.2", 
            point_size = 0.1, point_alpha = 0.5, 
            color_by = "clust_Harmony_BANKSY_lam0.2_k50_res0.5"
        ) +
            theme(legend.title = element_blank()) +
            guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))) +
            labs(title = 'Leiden Resolution 0.5'),
        # plotReducedDim(
        #     banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.2", 
        #     point_size = 0.1,point_alpha = 0.5, 
        #     color_by = "histology"
        # ) +
        #     theme(legend.title = element_blank()) +
        #     guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))),
        nrow = 1,
        rel_widths = c(1, 1, 1)
    ), ncol = 1,rel_heights = c(0.1, 5)
)

Code
plot_grid(
    ggdraw() + draw_label("BANKSY Embedding UMAP Before/After Harmony (Neighborhood)", fontface='bold'),
    plot_grid(
        plotReducedDim(
            banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.8", 
            point_size = 0.1,point_alpha = 0.5, 
            color_by = "clust_Harmony_BANKSY_lam0.8_k50_res1"
        ) +
            theme(legend.title = element_blank()) +
            guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))) +
            labs(title = 'Leiden Resolution 1'),
        plotReducedDim(
            banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.8", 
            point_size = 0.1,point_alpha = 0.5, 
            color_by = "clust_Harmony_BANKSY_lam0.8_k50_res0.5"
        ) +
            theme(legend.title = element_blank()) +
            guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))) +
            labs(title = 'Leiden Resolution 0.5'),
        # plotReducedDim(
        #     banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.8", 
        #     point_size = 0.1,point_alpha = 0.5, 
        #     color_by = "histology"
        # ) +
        #     theme(legend.title = element_blank()) +
        #     guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))),
        nrow = 1,
        rel_widths = c(1, 1, 1)
    ), ncol = 1,rel_heights = c(0.1, 5)
)

What are the cell types in the clusters?

Manually- and Xenium- annotated cell type markers

Code
kable(rbind(cell_type_markers[
    category == 'Cell Type' & xenium == TRUE,
    .(markers = paste0(marker, collapse = ', '), source = 'manual'),.(annotation)
],xenium_markers[
    ,.(markers = paste0(Genes, collapse = ', '), source = 'xenium'),.(annotation = Annotation)
]))

markers_to_score = rbind(cell_type_markers[
    category == 'Cell Type' & xenium == TRUE,
    .(markers = marker),.(annotation = paste0(annotation, '_manual'))
],xenium_markers[
    ,.(markers = Genes),.(annotation = paste0(Annotation, '_xenium'))
])[
    ,annotation := gsub(' ','_',annotation)
]

# Scoring the cells by cell type markers
cell_types = unique(markers_to_score$annotation)
marker_score = do.call('cbind',lapply(cell_types, function(x){
    genes_of_interest = markers_to_score[annotation == x]$markers
    cell_scores = log10(colSums(
        assay(banksy_embeddings[genes_of_interest,], 'normcounts')
    )/length(genes_of_interest) + 1)
    cell_scores_df = data.frame(cell_scores)
    rownames(cell_scores_df) = names(cell_scores)
    colnames(cell_scores_df) = x
    cell_scores_df
}))
colData(banksy_embeddings)[,cell_types] = NULL
colData(banksy_embeddings) = cbind(colData(banksy_embeddings),marker_score)

# make the plots
a = lapply(cell_types, function(x){
    assign(
        paste0('plot_',x),
        plotReducedDim(
            banksy_embeddings, "UMAP_Harmony_BANKSY_lam0.2", 
            point_size = 0.1, point_alpha = 0.5, 
            color_by = x
        ) +
            theme(legend.title = element_blank()) +
            guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))) +
            labs(title = x),
        envir = .GlobalEnv
    )
})
annotation markers source
Stromal S100A4, DCN manual
OPC OLIG2, PDGFRA, OLIG1, SOX10 manual
Progenitor Glia TOP2A manual
Oligodendrocyte MAG, MOG, OLIG1, MYRF manual
Endothelial PECAM1 manual
Microglia AIF1, PTPRC manual
Astrocyte AQP4, APOE manual
T_cell PTPRC, CD52 manual
RGC PAX6, HES1, CDH4, PTPRZ1 manual
VLMC ABCC9, ADAMTS12, ANO3, CDH6, CEMIP, COL12A1, CSPG4, DCN, FBLN1, IGFBP4, LAMA2, MCTP2, NR2F2, PHLDB2, PLCE1, RFTN1, SERPINA3, TMEM132C xenium
L4 IT ADAMTS16, FSTL4, OTOGL, POU6F2, RORB, SLC17A6 xenium
L6 IT ADAMTS3, C1QL3, SNTB2 xenium
Sncg ADRA1A, ADRA1B, LYPD6B, PROX1, THSD7B xenium
Glioblastoma (TME) AIF1, C1orf162, CAPG, CCL5, CD14, CD163, CD2, CD3G, CD4, CD48, CD52, CD68, CD83, CORO1A, CTSS, CX3CR1, CXCR4, CYTIP, FCER1G, FCGR1A, FCGR3A, GNLY, GPNMB, GPR183, GPR34, GZMA, HLA-DMB, IDO1, IL7R, ITGB2, KLF2, KLF4, KLRB1, LY86, MS4A6A, NKG7, NR4A2, RGS10, RNASET2, S100A4, STXBP2, TGFBI, TMIGD3, TRAC xenium
Chandelier ALK, ANK1, CNTNAP3B, LHX6, NPNT, PVALB, SDK1, TENM1, UNC5B xenium
Pax6 ANGPT1, CCK, CDH4, CXCL14, DDR2, PLD5, RELN, SFRP2, WIF1 xenium
Sst Chodl ANKRD18A, CRHBP, NDST4, NXPH2, SST, TAC1, TACR1, TRPC6 xenium
Glioblastoma (Cancer cells) ANXA1, B4GALNT1, BCAN, CAV1, CHODL, EGFR, ELOVL2, HES1, HILPDA, IDH1, IDH2, IGFBP3, IGFBP5, LOX, MGST1, NNAT, PSEN2, PSENEN, SOX11, SOX2, SOX4, TP53, TRIL, TTYH1 xenium
Microglia-PVM APOE, ARHGAP24, CCL4, CD86, CTSH, FASLG, HLA-DQA1, IFITM3, ITGAM, ITGAX, LRRK1, LYVE1, P2RY12, P2RY13, PTPRC, RGS16, SPI1, TGFB1, TREM2 xenium
Broad APP xenium
Astrocyte AQP4, FGFR3, GJA1, MEIS2, NES, PAX6, RYR3, SOX9, SPON1, TGFB2, THBS1 xenium
L5/6 NP ATP2C2, CD36, CRYM, HTR2C, TPH2, TSHZ2, VWC2L xenium
OPC BRINP3, CALCRL, ITGA8, LRRK2, OLIG1, OLIG2, PDGFRA, PTPRZ1, SEMA5A, SOX10, STK32B, VCAN xenium
Pvalb BTBD11, GAD1, MEPE, MYO5B, RSPO2, SAMD5, SULF1 xenium
Oligodendrocyte CAPN3, CDH1, CLDN11, CNDP1, CNTN2, CTNNA3, EFHD1, ERBB3, ERMN, FGFR2, HHATL, KLK6, MAG, MAL, MOBP, MOG, MYRF, NCSTN, OPALIN, PCSK6, PSEN1, ST18, UGT8 xenium
Proliferation CCNA1, CCNB2, CDK1, CENPF, KIT, MKI67, PCNA, TOP2A xenium
L6 IT Car3 CDH12, GAS2L3, NPY1R, NTNG2, NWD2, POSTN, RASGRP1, SLC26A4, THEMIS, TRPC5, ZDHHC23 xenium
Endothelial CEMIP2, FLT1, NOTCH1, NRP1, PECAM1, RNF144B, STAT3, THSD4 xenium
Sst COL25A1, PLCH1, SYNPR, TRHDE xenium
L2/3 IT CUX2, TESPA1, ZBBX xenium
Lamp5 DNER, PTCHD4 xenium
Lamp5 Lhx6 EYA4, GAD2, LAMP5, LYPD6, MYO16, NTNG1, PDGFD, SPHKAP xenium
L6 CT FILIP1, HS3ST2, HS3ST4, KCNH5 xenium
L5 IT HTR2A, RXFP1 xenium
L6b NPFFR2, ROS1, SLC17A7 xenium
L5 ET PCSK1, RIT2, SLIT3, SNCG, SORCS1 xenium
Vip SLC24A3, VIP xenium

We are going to annotate 37 cell types. and we will now visualize the marker scores.

We can start to appreciate that certain markers are specific to certain clusters:

  • Cluster 1 is Pax6+ cells
  • Cluster 2 is Pax6+ cells with strong GBM Cancer Cell markers (xenium)
  • Cluster 3 is astrocytes
  • Cluster 5 is microglia/GBM TME (xenium)
  • Cluster 6 is OPC (potentially also oligodendrocytes)
  • Cluster 8 is also Pax6+ cells
  • Cluster 9 is VLMC, stromal and endothelial cell types
  • Cluster 11 has a minor progenitor glia population also high in proliferation gene markers (xenium)
  • Cluster 13 is also microglia
  • Cluster 14 is oligodendrocytes
Code
plot_grid(
    ggdraw() + draw_label("BANKSY Embedding UMAP by Manually Curated Markers", fontface='bold'),
    plot_grid(
        plotlist = mget(paste0('plot_',grep('_manual',cell_types, value = TRUE))),
        nrow = 3,
        rel_widths = rep(1,sum(grepl('_manual',cell_types)))
    ), ncol = 1,rel_heights = c(0.1, 5)
)

Code
plot_grid(
    ggdraw() + draw_label("BANKSY Embedding UMAP by Xenium Markers", fontface='bold'),
    plot_grid(
        plotlist = mget(paste0('plot_',grep('_xenium',cell_types, value = TRUE))),
        nrow = 5,
        rel_widths = rep(1,sum(grepl('_xenium',cell_types)))
    ), ncol = 1,rel_heights = c(0.1, 5)
)

Code
cell_score_dt = colData(banksy_embeddings)[,c(
    cell_types, 'clust_Harmony_BANKSY_lam0.2_k50_res1'
)]
cell_score_dt$cell_sample_id = rownames(cell_score_dt)
cell_score_dt = melt(as.data.table(cell_score_dt), id.vars = c('cell_sample_id','clust_Harmony_BANKSY_lam0.2_k50_res1'))

cell_types_to_exclude = c('Glioblastoma_.Cancer_cells._xenium','Broad_xenium','Astrocyte_xenium')
cell_type_pal = as.character(pals::polychrome(length(
    unique(cell_score_dt[!variable %in% cell_types_to_exclude]$variable)
)))
ggplot(
    cell_score_dt[!variable %in% cell_types_to_exclude],
    aes(x = value, fill = variable)
) + geom_density(alpha = 0.2) +
    facet_wrap(~clust_Harmony_BANKSY_lam0.2_k50_res1, scales = 'free') + 
    theme_minimal() + xlim(0,0.75) + ylim(0,5) +
    scale_fill_manual(values = cell_type_pal)
Warning: Removed 5797 rows containing non-finite outside the scale range (`stat_density()`).

Code
cluster_pal = as.character(pals::alphabet(length(unique(cell_score_dt$clust_Harmony_BANKSY_lam0.2_k50_res1))))
ggplot(
    cell_score_dt[!variable %in% c('Glioblastoma_.Cancer_cells._xenium','Broad_xenium')],
    aes(x = value, fill = clust_Harmony_BANKSY_lam0.2_k50_res1)
) + geom_density(alpha = 0.5) + 
    facet_wrap(~variable, scales = 'free') + 
    theme_minimal() + ylim(0,6) +
    scale_fill_manual(values = cluster_pal) +
    labs(fill = 'Cluster ID')

Code
scran_cell_markers = findMarkers(
    assay(banksy_embeddings, "counts"),
    groups = banksy_embeddings$clust_Harmony_BANKSY_lam0.2_k50_res1,
    test.type="wilcox", direction="up", lfc = 0.5, pval.type="some"
)
lapply(scran_cell_markers, function(x) rownames(x[x$FDR <= 0.25,]))
$`1`
[1] "RELN"   "FGFR3"  "APP"    "IGFBP5" "SOX4"   "SOX9"  

$`2`
[1] "RELN" "NNAT"

$`3`
[1] "AQP4"   "APOE"   "PTPRZ1" "GJA1"   "OLIG1"  "SOX9"   "TTYH1" 

$`4`
[1] "PAX6"  "LAMP5" "FGFR3" "SOX4" 

$`5`
character(0)

$`6`
 [1] "PTPRZ1"   "OLIG1"    "SEMA5A"   "OLIG2"    "APOE"     "VCAN"     "PDGFRA"   "SOX10"    "BCAN"     "APP"     
[11] "DNER"     "SERPINA3" "NES"      "GAD1"     "STAT3"    "POSTN"    "IDH2"     "UGT8"     "AQP4"     "IGFBP5"  
[21] "NTNG1"    "TMEM132C"

$`7`
character(0)

$`8`
[1] "PAX6"   "CXCL14" "SYNPR"  "RGS16"  "LAMP5"  "DNER"   "FGFR3" 

$`9`
[1] "FLT1"

$`10`
[1] "RELN"

$`11`
[1] "RELN"

$`12`
[1] "RELN"  "FGFR3"

$`13`
[1] "CD83"

$`14`
character(0)

Looking at the cell types + niches in space

Code
graph_dt = melt(
    as.data.table(cbind(
        spatialCoords(banksy_embeddings),
        colData(banksy_embeddings)[c(
            'sample_id','cell_id',
            grep('Harmony.*.res1$',clusterNames(banksy_embeddings), value = TRUE)
        )]
    )),id.vars = c('sample_id','cell_id','sdimx','sdimy'),
    variable.name = 'Cluster Type',value.name = 'Cluster ID'
)[,sample_id := gsub('output-XETG00078__','',sample_id)]
graph_dt

cluster_classes = sort(unique(graph_dt$`Cluster ID`))
color_manual = structure(
    pals::polychrome()[c(1:length(cluster_classes))],
    names = cluster_classes
)

graph_dt[,.(min_x = min(sdimx)), by = c('sample_id')][order(min_x)]

ggplot(graph_dt, aes(x=sdimy, y=sdimx, color=`Cluster ID`)) +
    facet_wrap(`Cluster Type`~sample_id, nrow = 2, scale = 'free_x') +
    geom_point(size = 0.3) + 
    scale_color_manual(values = color_manual) +
    theme_classic() + 
    theme(
        legend.position = "bottom",
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank()
    ) + guides(
        color = guide_legend(nrow = 3, byrow = TRUE)
    ) +
    coord_flip()

    sample_id     min_x
       <char>     <num>
 1:     42112  14616.65
 2:     51420  28491.42
 3:     53398  42717.19
 4:     67068  57325.54
 5:     87352  71228.25
 6:    188570  85592.35
 7:    240468  99684.05
 8:    241670 113911.71
 9:    258482 128370.33
10:    320784 142408.38
11:    328368 156628.75